

import pandas as pd
import sys
import statsmodels.formula.api as smf

# When running this file usually also add argument which is the path for the table
try:
    overleaf_path = sys.argv[1]
except:
    overleaf_path = './../Apps/Overleaf/bbm/draft/reports/revision/'

#%% IMPORT DATA -------------------------------------------------------------------------
df = pd.read_csv('./data_py/processed/contracts_final.csv', index_col=[0])

#%% SET FORMULAS FOR REGRESSIONS --------------------------------------------------------
formulas_by_reg = dict()
summary_to_tex = dict()

# Set formulas
formulas_by_reg[0] = 'mri ~ C(spec) + C(spec):C(boom) - 1'
formulas_by_reg[1] = (
    "mri ~ C(spec) + C(spec):C(boom) + value -1"
)
formulas_by_reg[2] = (
    "mri ~ C(spec) + C(spec):C(boom) + value + tau -1"
)

coefs_to_tex_names = {
    'C(spec)[low]': 'low',
    'C(spec)[mid]': 'mid',
    'C(spec)[high]': 'high',
    'C(spec)[low]:C(boom)[T.True]': 'low_boom',
    'C(spec)[mid]:C(boom)[T.True]': 'mid_boom',
    'C(spec)[high]:C(boom)[T.True]': 'high_boom',
    'gas:value': 'value',
    'tau': 'total_days_description',
    'reneg': 'reneg'
}

#%% DO REGRESSIONS ----------------------------------------------------------------------
# Do regressions
for i in formulas_by_reg:
    reg = smf.ols(
        formula=formulas_by_reg[i],
        data=df
    ).fit(cov_type='HC0')
    summary_to_tex[f"r_{i}"] = round(reg.rsquared, 2)
    summary_to_tex[f"n_{i}"] = int(reg.nobs)
    for k in coefs_to_tex_names:
        try:
            summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = str(round(reg.params[k], 3))
            summary_to_tex[f"se_{coefs_to_tex_names[k]}_{i}"] = str(round(reg.HC0_se[k], 3))

            p_value = reg.pvalues[k]
            # Add stars to the coefficients in the table
            if p_value < 0.01:
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "***"
            if (p_value >= 0.01) & (p_value < 0.05):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "**"
            if (p_value >= 0.05) & (p_value < 0.1):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "*"
        except:
            print(f"{k} not in regression {i}")

#%% PRODUCE THE TABLE -------------------------------------------------------------------
with open('./src/tex/table_sorting.tex', 'r') as f:
    tex = f.read()
    output = tex.format(**summary_to_tex)

with open(overleaf_path + 'tables/table_sorting.tex', 'w') as f:
    f.write(output)
